Data Visualization Project 02

Importing Data Set

Here I am importing the spacex.csv data set into the markdown file, and initializing my libraries. This data set looks at launch data taken from 2010-0218. It contains information including date, launch site, payload size, mission outcome and more.

library(tidyverse)
library(sf)
library(plotly)
library(dplyr)

spacex <- read_csv("../data/spacex.csv") %>%
 mutate(
    Date       = as.Date(Date, "%B %d, %Y"),         # e.g. "June 4, 2010"
    Year       = year(Date),                         # extract integer year
    success    = (`Mission Outcome` == "Success"),   # TRUE/FALSE
    payload_kg = parse_number(`Payload Mass (kg)`)   # strip commas, make numeric
  )
glimpse(spacex)
## Rows: 51
## Columns: 14
## $ `Flight Number`     <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10",…
## $ Date                <date> 2010-06-04, 2010-12-08, 2012-05-22, 2012-10-08, 2…
## $ `Time (UTC)`        <time> 18:45:00, 15:43:00, 07:44:00, 00:35:00, 15:10:00,…
## $ `Booster Version`   <chr> "F9 v1.0", "F9 v1.0", "F9 v1.0", "F9 v1.0", "F9 v1…
## $ `Launch Site`       <chr> "CCAFS LC-40", "CCAFS LC-40", "CCAFS LC-40", "CCAF…
## $ Payload             <chr> "Dragon Spacecraft Qualification Unit", "Dragon de…
## $ `Payload Mass (kg)` <chr> NA, NA, "525 ", "500 ", "677 ", "500 ", "3,170 ", …
## $ Orbit               <chr> "LEO", "LEO", "LEO", "LEO", "LEO", "Polar orbit", …
## $ Customer            <chr> "SpaceX", "NASA (COTS) NRO", "NASA (COTS)", "NASA …
## $ `Mission Outcome`   <chr> "Success", "Success", "Success", "Success", "Succe…
## $ `Landing Outcome`   <chr> "Failure (parachutes)", "Failure (parachutes)", "N…
## $ Year                <dbl> 2010, 2010, 2012, 2012, 2013, 2013, 2013, 2014, 20…
## $ success             <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TR…
## $ payload_kg          <dbl> NA, NA, 525, 500, 677, 500, 3170, 3325, 2296, 1316…

Data Summary

In this section I filtered the SpaceX data to see: - Number of launches per year - Success rate per launch site - Average payload mass by booster version

#Launches per year
launches <- spacex %>%
  group_by(
    Year = as.integer(format(as.Date(Date, "%B %d, %Y"),"%Y"))
  ) %>%
  summarize(
    `Number of Launches` = n(),
    `Success Rate`       = mean(success, na.rm = TRUE)
  ) %>%
  arrange(Year)

launches
## # A tibble: 8 × 3
##    Year `Number of Launches` `Success Rate`
##   <int>                <int>          <dbl>
## 1  2010                    2          1    
## 2  2012                    2          1    
## 3  2013                    3          1    
## 4  2014                    6          1    
## 5  2015                    7          0.857
## 6  2016                    8          1    
## 7  2017                   18          1    
## 8  2018                    5          0.8
#Success rate
success <- spacex %>%
  group_by(`Launch Site`) %>%
  summarize(
    `Total Launches` = n(),
    `Success Rate`   = mean(`Mission Outcome` == "Success", na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(`Total Launches`))

success
## # A tibble: 5 × 3
##   `Launch Site`                                  `Total Launches` `Success Rate`
##   <chr>                                                     <int>          <dbl>
## 1 CCAFS LC-40                                                  26          0.962
## 2 KSC LC-39A                                                   13          1    
## 3 VAFB SLC-4E                                                   8          1    
## 4 CCAFS SLC-40                                                  3          1    
## 5 CCAFS SLC-40 (after static fire on KSC LC-39A)                1          0
#Avg Mass
payload <- spacex %>%
  group_by(Booster = `Booster Version`) %>%
  summarise(
    `Average Payload (kg)` = mean(parse_number(`Payload Mass (kg)`), na.rm = TRUE),
    `Number of Launches`   = n(),
    .groups = "drop"
  ) %>%
  arrange(desc(`Average Payload (kg)`)) %>%
  filter(!is.na(`Average Payload (kg)`))
  
payload  
## # A tibble: 31 × 3
##    Booster       `Average Payload (kg)` `Number of Launches`
##    <chr>                          <dbl>                <int>
##  1 F9 B4 B1041.1                   9600                    1
##  2 F9 FT B1029.1                   9600                    1
##  3 F9 FT B1036.1                   9600                    1
##  4 F9 FT B1036.2                   9600                    1
##  5 F9 FT B1037                     6761                    1
##  6 F9 B4 B1044                     6092                    1
##  7 F9 FT B1034                     6070                    1
##  8 F9 FT B1030                     5600                    1
##  9 F9 FT B1021.2                   5300                    1
## 10 F9 FT B1020                     5271                    1
## # ℹ 21 more rows

Interactive Data Visualization

spacex <- spacex %>% 
  mutate(payload_kg = parse_number(`Payload Mass (kg)`))

plot_ly(
  data = spacex,
  x    = ~as.numeric(`Flight Number`),
  y    = ~payload_kg,
  size = ~payload_kg,
  color   = ~`Launch Site`,
  text = ~paste0(
    "Flight #: ", `Flight Number`, "<br>",
    "Date: ", Date, "<br>",
    "Booster: ", `Booster Version`, "<br>",
    "Site: ", `Launch Site`, "<br>",
    "Payload: ", payload_kg, " kg<br>",
    "Outcome: ", `Mission Outcome`
  ),
  hoverinfo = "text",
  mode = "markers"
) %>%
  layout(
    title = "SpaceX Payload by Flight",
    xaxis = list(title = "Flight Number"),
    yaxis = list(title = "Payload Mass (kg)"),
    legend = list(title = list(text = "<b>Launch Site</b>"))
  )

Spatial Visualization

world_shapes <- read_sf("../data/ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp", quiet = TRUE) %>% 
  filter(ISO_A3 == "USA")

#site coords
site_coords <- tribble(
  ~site,           ~lon,        ~lat,
  "CCAFS LC-40",  -80.577366,  28.561871,
  "CCAFS SLC-40", -80.583333,  28.583330,
  "KSC LC-39A",   -80.604333,  28.608389,
  "VAFB SLC-4E", -120.610829,  34.632092
)


success_by_site <- spacex %>%
  group_by(site = `Launch Site`) %>%
  summarise(
    total_launches = n(),
    success_rate   = mean(`Mission Outcome` == "Success", na.rm = TRUE),
    .groups = "drop"
  )

sites_sf <- success_by_site %>%
  left_join(site_coords, by = "site") %>%
  filter(!is.na(lon), !is.na(lat)) %>%
  mutate(
    lon = lon + runif(n(), -0.1, 0.1),
    lat = lat + runif(n(), -0.1, 0.1)
  ) %>%
  st_as_sf(coords = c("lon", "lat"), crs = 4326)

#Plot
ggplot() +
  geom_sf(data = world_shapes, fill = "gray95", color = "gray80") +
  geom_sf(
    data = sites_sf,
    aes(size = total_launches, color = success_rate),
    alpha = 0.8
  ) +
  scale_size(range = c(1, 5)) +
  scale_color_viridis_c(labels = scales::percent_format(accuracy = 1)) +
  labs(
    title    = "SpaceX Launch Sites in the USA (2010–2021)",
    subtitle = "Point size = total launches, color = success rate",
    size     = "Total\nLaunches",
    color    = "Success\nRate"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    panel.grid.major = element_line(color = "white"),
    plot.title       = element_text(face = "bold", size = 16),
    plot.subtitle    = element_text(size = 12)
  )

Visualization of a Model

library(broom)       # for tidy()
library(dotwhisker)  # for dwplot()


# avoid lubridate::year() name clash
spacex2 <- spacex %>%
  rename(launch_year = Year)

# fit the logistic model
log_mod <- glm(
  success ~ payload_kg + launch_year,
  data   = spacex2,
  family = binomial
)

ggplot(spacex, aes(x = payload_kg, y = as.numeric(success))) +
  geom_jitter(
  aes(x     = payload_kg,
      y     = as.numeric(success),
      colour = success),   # <-- map colour to the success flag
  height = 0.02,
  alpha  = 0.6,
  size   = 2
) +
scale_colour_manual(
  "Launch Outcome",
  values = c(
    "TRUE"  = "forestgreen",
    "FALSE" = "firebrick"
  )
) +
  stat_smooth(
  method      = "glm",
  method.args = list(family = "binomial"),
  se          = TRUE,
  color       = "forestgreen",
  fill        = alpha("palegreen", 0.3)
) +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(title = "Success Probability vs. Payload Mass",
       x = "Payload Mass (kg)",
       y = "Probability of Success") +
  theme_minimal()

ggsave("figures/success_prob_vmass.png",
       width = 6, height = 4, units = "in", dpi = 300)

tidy(log_mod, conf.int = TRUE) %>%
  filter(term != "(Intercept)") %>%
  dwplot(point_size = 3) +
  labs(title = "Predictors of Launch Success",
       x = "Log-Odds Coefficient",
       y = "") +
  theme_minimal()

The first visualization (“Success Probability vs. Payload Mass”) shows a fitted logistic curve overlaid on the raw jittered success/failure points. Notice how the blue line sits up near 100 % success for almost the entire range of payloads and only very slightly dips at the highest masses. In other words, payload mass really doesn’t effect whether a flight succeeds or not (the 95 % confidence ribbon is very wide at the far end but the mean curve is essentially flat).

The second visualization (“Predictors of Launch Success”) is a classic coefficient‐plot of your glm. It shows the log-odds estimates (with confidence intervals) for both payload_kg and launch_year, excluding the intercept. Both coefficients sit close to zero and their intervals overlap zero, indicating neither payload nor year is a strong predictor of success.